% This script processes the data of the herding experiment.

clear all;

% configuration

q=0.7;
firstround = 4; % first round to look at
lastround = 30; % last round to look at
maxsubj = 22; % maximum number of subjects in any session
plotsubject = [0,2,2,8]; % set to plot a subject's actions [plot,treatment,session,subject]
numup = -4:4; % number of up signals
pricethreshold = 100*q.^numup./(q.^numup+(1-q).^numup);

% data files
nbfilelist = cellstr(['session 1 no beliefs 081116.csv'; 'session 4 no beliefs 081616.csv'; 'session 5 no beliefs 082516.csv']);
%nbfilelist = cellstr(['mturk 2016-08-05 1826 1-3.csv']);
bfilelist = cellstr(['session 2 beliefs 081216.csv' ; 'session 3 beliefs 081216.csv'; 'session 6 beliefs 082516.csv']);     
        
%%%%%%%%%%%%%%
% read in data
%%%%%%%%%%%%%%

for i=1:2 % loop over treatments
            
    if i==1
        filelist = nbfilelist;
        % configpost adds a column indicating which price path is opposite
        % (used for checking symmetry)
        configfile = 'configpost.csv';
    else
        filelist = bfilelist;
        configfile = 'configpost.csv';
    end
    
    numfiles = size(filelist,1);
    ids = [];
    data = [];
    
    for f=1:numfiles
        filename = char(filelist(f))

        [tempdata,tempnumsubjects,tempids] = parseoutput(filename,configfile,q);
    
        % add to data array with file id
        data = [data; f*ones(size(tempdata,1),1) tempdata];
        ids = [ids; [tempids zeros(1,maxsubj - size(tempids,2))]]; % one row per session
        numsubjects(f) = tempnumsubjects;
    end
    
    disp('Number of subjects in treatment:')
    numsubj(i) = sum(numsubjects);
    numsubj(i)
    
    % keep only data we want to analyze
    data = data(data(:,3)>=firstround & data(:,3)<=lastround,:);

    nobs = size(data,1);  % number of observations
    
    % label columns
    session = data(:,1);
    subjectid = data(:,2);
    round = data(:,3);
    currentrow = data(:,4);
    payoff = data(:,5);
    realizedtrade = data(:,6);
    action1 = data(:,7);
    action0 = data(:,8);
    education = data(:,9);
    age = data(:,10);
    gender = data(:,11);
    turn = data(:,12);
    assetval = data(:,13);
    price = data(:,14)*100;
    contradiction = data(:,15);
    streak = data(:,16);
    opposite = data(:,17);

    %%%%%%%%%%%%%%
    % process
    %%%%%%%%%%%%%%
    
    % demographics
%     agearr = -1*ones(size(ids,1),maxsubj);
%     educationarr = -1*ones(size(ids,1),maxsubj);
%     genderarr = -1*ones(size(ids,1),maxsubj);
%     for s=1:size(ids,1) % loop over sessions
%         for j=1:numsubjects(s) % loop over subjects in session
%             identity = (session == s & subjectid == ids(s,j));
%             [unused, agearr(s,j)] = find(~isnan(age)&identity==1);
%             [unused, educationarr(s,j)] = find(~isnan(age)&identity==1);
%             [unused, genderarr(s,j)] = find(~isnan(age)&identity==1);
%         end
%     end
    agearr(:,i) = age(~isnan(age));
    educarr(:,i) = education(~isnan(education));
    genderarr(:,i) = gender(~isnan(gender));
            
    % frequency of herding, rational and contrarian behavior
    herding = (action0 == 1 & action1 == 1 & price > 50) | (action0 == -1 & action1 == -1 & price < 50);
    rational = (action0 == -1 & action1 == 1);
    contrarian = (action0 == -1 & action1 == -1 & price > 50) | (action0 == 1 & action1 == 1 & price < 50);
    notrade = (action0 == 0 & action1 == 0);
    partialherding = (action0 == -1 & action1 == 0 & price < 50) | (action0 == 0 & action1 == 1 & price > 50);
    partialcontrarian = (action0 == 0 & action1 == 1 & price < 50) | (action0 == -1 & action1 == 0 & price > 50);
    irrational = (herding ==0 & rational ==0 & contrarian == 0 & notrade == 0 & partialherding == 0 & partialcontrarian == 0);

    % define a normalized price due to symmetry around price=50
    normprice = price.*(price>=50)+(100-price).*(price<50);
    
%     disp('Frequency of each type of behavior by price:');
%     fprintf('Price Rational Irrational No Trade Herding Contrarian Partial Herding Partial Contrarian\n');
%     for p=5:9        
%         fprintf('%.3f ',pricethreshold(p));
%         fprintf('%.3f ',sum(rational(normprice==pricethreshold(p)))/size(normprice(normprice==pricethreshold(p)),1));  
%         fprintf('%.3f ',sum(irrational(normprice==pricethreshold(p)))/size(normprice(normprice==pricethreshold(p)),1));
%         fprintf('%.3f ',sum(notrade(normprice==pricethreshold(p)))/size(normprice(normprice==pricethreshold(p)),1))
%         fprintf('%.3f ',sum(herding(normprice==pricethreshold(p)))/size(normprice(normprice==pricethreshold(p)),1))
%         fprintf('%.3f ',sum(contrarian(normprice==pricethreshold(p)))/size(normprice(normprice==pricethreshold(p)),1))
%         fprintf('%.3f ',sum(partialherding(normprice==pricethreshold(p)))/size(normprice(normprice==pricethreshold(p)),1))
%         fprintf('%.3f\n',sum(partialcontrarian(normprice==pricethreshold(p)))/size(normprice(normprice==pricethreshold(p)),1))
%     end
    
    
    %%% Individual analysis
    
    type = -1*ones(size(ids,1),maxsubj);
    score = -1*ones(size(ids,1),maxsubj);
    diff0 = -1*ones(size(ids,1),maxsubj);
    diff1 = -1*ones(size(ids,1),maxsubj);
    lambda = zeros(size(ids,1),maxsubj);
    dminusa = zeros(size(ids,1),maxsubj); % delta - alpha
    rntype = -1*ones(size(ids,1),maxsubj);
    rnscore = -1*ones(size(ids,1),maxsubj);
    eutype = -1*ones(size(ids,1),maxsubj);
    euscore = -1*ones(size(ids,1),maxsubj);
    nolatype = -1*ones(size(ids,1),maxsubj);
    nolascore = -1*ones(size(ids,1),maxsubj);
    eunolatype = -1*ones(size(ids,1),maxsubj);
    eunolascore = -1*ones(size(ids,1),maxsubj);
    eupttype = -1*ones(size(ids,1),maxsubj);
    euptscore = -1*ones(size(ids,1),maxsubj);
    symtype = -1*ones(size(ids,1),maxsubj);
    symscore = -1*ones(size(ids,1),maxsubj);
    subjherd = -1*ones(size(ids,1),maxsubj);
    subjcont = -1*ones(size(ids,1),maxsubj);
    
    % match subjects to type and plot
    for s=1:size(ids,1) % loop over sessions
        for j=1:numsubjects(s) % loop over subjects in session
            identity = (session == s & subjectid == ids(s,j));

            % extract prices and actions of subject
            sprice = price(identity==1);
            saction0 = action0(identity==1);
            saction1 = action1(identity==1);
            % full prospect theory only
            [type(s,j), score(s,j),diff0(s,j),diff1(s,j)] = matchtypes(saction1,saction0,sprice,pricethreshold,[0 0 0 1 0]);
            % parameters are only applicable to prospect theory types:
            % classify expected utility types too but later ignore their
            % parameters
            [lambda(s,j), dminusa(s,j)] = findparameters(diff0(s,j),diff1(s,j),q,type(s,j)>=1000&&type(s,j)<2000,type(s,j)>=2000);
            % match expected utility only
            [eutype(s,j), euscore(s,j)] = matchtypes(saction1,saction0,sprice,pricethreshold,[1 1 0 0 0]);
            % match prospect theory, no loss aversion
            [nolatype(s,j), nolascore(s,j)] = matchtypes(saction1,saction0,sprice,pricethreshold,[0 0 1 0 0]);
            % match symmetry model
            [symtype(s,j), symscore(s,j)] = matchtypes(saction1,saction0,sprice,pricethreshold,[0 0 0 0 1]);
            
            % match EU and no LA
            [eunolatype(s,j), eunolascore(s,j)] = matchtypes(saction1,saction0,sprice,pricethreshold,[1 1 1 0 0]);
            % match EU and PT
            [eupttype(s,j), euptscore(s,j)] = matchtypes(saction1,saction0,sprice,pricethreshold,[1 1 0 1 0]);            
            
            % herding and contrarianism by subject
%             subjherd(s,j) = sum(herding(identity==1))+sum(partialherding(identity==1));
%             subjcont(s,j) = sum(contrarian(identity==1))+sum(partialcontrarian(identity==1));
            subjherd(s,j) = sum(herding(identity==1));
            subjcont(s,j) = sum(contrarian(identity==1));
            
            % create plot of subject's behavior
            if plotsubject(1) == 1 && plotsubject(2)==i && plotsubject(3)==s && plotsubject(4) == j
                figure;
                % look for repeat prices and use to shift points up to avoid
                % overlap
                multiprice = zeros(size(sprice,1),1);
                for k=2:size(sprice,1)
                    prevcount = sum(sprice(1:(k-1))==sprice(k));
                    multiprice(k) = prevcount;
                end
                saction0 = saction0+multiprice*0.05;
                saction1 = saction1+multiprice*0.05;
                sc1 = scatter(sprice,saction0,40,'red','x');
                hold on;
                sc2 = scatter(sprice,saction1,20,'blue');
                legend([sc1 sc2],'s_t=0','s_t=1');
                ylim([-1.5 1.5]);
                xlim([0 100]);
                title(['Session:',num2str(s),', Subject:', num2str(ids(s,j))])
            end
                        
        end
    end
    
    disp('EU only: risk-neutral, risk-averse, risk-seeking:');
    fprintf('%d %d %d\n',sum(sum(eutype==0)),sum(sum(eutype>0&eutype<20)),sum(sum(eutype>=20&eutype<1000)))
    disp('EU and no loss aversion: risk-neutral, risk-averse, risk-seeking, herding, contrarian subjects:');
    fprintf('%d %d %d %d %d\n',sum(sum(eunolatype==0)),sum(sum(eunolatype>0&eunolatype<20)),sum(sum(eunolatype>=20&eunolatype<1000)),sum(sum(eunolatype>=1000&eunolatype<2000)),sum(sum(eunolatype>=2000)))
    disp('EU and PT: risk-neutral, risk-averse, risk-seeking, herding, contrarian subjects:');
    fprintf('%d %d %d %d %d\n',sum(sum(eupttype==0)),sum(sum(eupttype>0&eupttype<20)),sum(sum(eupttype>=20&eupttype<1000)),sum(sum(eupttype>=1000&eupttype<2000)),sum(sum(eupttype>=2000)))
    disp('Minimum, average, and maximum match score among PT subjects:')
    fprintf('%2f %2f %2f\n',min(min(score(type>=1000))),sum(sum(score(type>=1000)))/sum(sum(type>=1000)),max(max(score(type>=1000))))
    
    % report modal type in session
    disp('Modal type, lambda, dminusa');
    modaltype=mode(mode(eupttype(eupttype~=-1)))
    [sessidx,subjidx]=find(eupttype==modaltype,1);
    [lambda(sessidx,subjidx) dminusa(sessidx,subjidx)]
    
    if i==1
        nbdata = [data herding rational contrarian notrade partialherding partialcontrarian irrational];
    else
        bdata = [data herding rational contrarian notrade partialherding partialcontrarian irrational];
    end
    
    % plot cdfs of match scores
    figure;
    hold on;
    [eucdf,eux]=ecdf([euscore(1,1:numsubjects(1)) euscore(2,1:numsubjects(2)) euscore(3,1:numsubjects(3))]);
    [nolacdf,nolax]=ecdf([nolascore(1,1:numsubjects(1)) nolascore(2,1:numsubjects(2)) nolascore(3,1:numsubjects(3))]);
    [ptcdf,ptx]=ecdf([score(1,1:numsubjects(1)) score(2,1:numsubjects(2)) score(3,1:numsubjects(3))]);
    [symcdf,symx]=ecdf([symscore(1,1:numsubjects(1)) symscore(2,1:numsubjects(2)) symscore(3,1:numsubjects(3))]);

    % downsample for markers
    eucdf_ds = mydownsample(eucdf,5);
    eux_ds = mydownsample(eux,5);
    nolacdf_ds = mydownsample(nolacdf,5);
    nolax_ds = mydownsample(nolax,5);
    ptcdf_ds = mydownsample(ptcdf,5);
    ptx_ds = mydownsample(ptx,5);
    symcdf_ds = mydownsample(symcdf,5);
    symx_ds = mydownsample(symx,5);
    
    h1 = stairs(eux,eucdf,':k','Linewidth',1);
    h2 = scatter(eux_ds,eucdf_ds,35);
    h2.Marker = 'o';
    h2.MarkerFaceColor = 'black';
    h3 = stairs(nolax,nolacdf,':k','Linewidth',1);
    h4 = scatter(nolax_ds,nolacdf_ds,35);
    h4.Marker = '^';
    h4.MarkerFaceColor = 'black';
    h5 = stairs(ptx,ptcdf,'-k','Linewidth',2);
    h6 = scatter(ptx_ds,ptcdf_ds,35);
    h6.Marker = 'diamond';
    h6.MarkerFaceColor = 'black';
    h7 = stairs(symx,symcdf,':k','Linewidth',1);
    h8 = scatter(symx_ds,symcdf_ds,35);
    h8.Marker = 'square';
    h8.MarkerFaceColor = 'black';
    legend([h2 h4 h6 h8], {'CRRA','No loss aversion','Prospect theory', 'Best symmetric'});
    xlim([0.3 1]);
    ylim([0 1]);
    xlabel('Match score');
    ylabel('CDF');
    grid on;
%     if i==1
%         title('No Beliefs');
%     else
%         title('Main');
%     end
    
    [h,p,kstat] = kstest2([euscore(1:numsubjects(1)) euscore(2:numsubjects(2)) euscore(3:numsubjects(3))],[nolascore(1:numsubjects(1)) nolascore(2:numsubjects(2)) nolascore(3:numsubjects(3))]);
    disp('p value for KS test of EU different from no LA'); 
    p
    
    [h,p,kstat] = kstest2([score(1:numsubjects(1)) score(2:numsubjects(2)) score(3:numsubjects(3))],[symscore(1:numsubjects(1)) symscore(2:numsubjects(2)) symscore(3:numsubjects(3))]);
    disp('p value for KS test of PT different from symmetric'); 
    p
    
    [h,p,kstat] = kstest2([nolascore(1:numsubjects(1)) nolascore(2:numsubjects(2)) nolascore(3:numsubjects(3))],[symscore(1:numsubjects(1)) symscore(2:numsubjects(2)) symscore(3:numsubjects(3))]);
    disp('p value for KS test of no LA different from symmetric'); 
    p
    
    [h,p,kstat] = kstest2([euscore(1:numsubjects(1)) euscore(2:numsubjects(2)) euscore(3:numsubjects(3))],[score(1:numsubjects(1)) score(2:numsubjects(2)) score(3:numsubjects(3))]);
    disp('p value for KS test of PT different from EU'); 
    p
    
    % find unique parameter pairs and number of subjects with each
    % only look at risk-neutral and PT subjects as risk-averse and
    % risk-seeking require different model
    
    % replace parameters for EU types (in case of tie, parameters assuming
    % PT may not match risk-neutral parameters)
    for s=1:size(ids,1) % loop over sessions
        for j=1:numsubjects(s) % loop over subjects in session
            if eupttype(s,j) == 0 % risk neutral
                dminusa(s,j)= 0;
                lambda(s,j)=1;
            end
        end
    end
    
    params = []; % vector is dminusa, lammbda, number of subjects
    paramidx = 1;
    for s=1:size(ids,1) % loop over sessions
        for j=1:numsubjects(s) % loop over subjects in session
            if eupttype(s,j)==0 || eupttype(s,j)>=1000 % only look at risk-neutral and herding/contrarian
                if isempty(params) % initialize with first subject parameters
                    params(1,paramidx) = dminusa(s,j);
                    params(2,paramidx) = lambda(s,j);
                    params(3,paramidx) = 1; % one subject has this pair
                    paramidx  = paramidx + 1;
                else 
                    [dminusar,dminusac] = find(dminusa(s,j)==params(1,:));
                    [lambdar,lambdac] = find(lambda(s,j)==params(2,:));
                    bothmatchidx = intersect(dminusac,lambdac);
                    if size(bothmatchidx,1)==0 || size(bothmatchidx,2)==0
                        % pair not yet found, add to list
                        params(1,paramidx) = dminusa(s,j);
                        params(2,paramidx) = lambda(s,j);
                        params(3,paramidx) = 1; % one subject has this pair
                        paramidx  = paramidx + 1;
                    elseif size(bothmatchidx,1)==1 && size(bothmatchidx,2)==1
                        % pair exists, add one more subject
                        params(3,bothmatchidx) = params(3,bothmatchidx)+ 1; 
                    else
                        error('Found more than one match for a parameter pair.');
                    end
                end
            end
        end
    end
    if i==2 % main treatment
        % save parameters for simulation
        save('pt_parameters','params','dminusa','lambda','numsubjects','ids','eupttype','score');
    end
    
    % scatter plot of parameters
    figure;
    scatter(params(1,:),params(2,:),30*params(3,:),'filled','ok');
    xlabel('\delta - \alpha');
    ylabel('\lambda');
%     if i==1
%         title('No Beliefs');
%     else
%         title('Main');
%     end
       
    % Earnings
    disp('Average earnings:');
    sum(data(:,5))/sum(numsubjects)/100*0.4 %$0.40 per 100 ECUs
    
    herdearn = 0;
    herdcount = 0;
    euearn = 0;
    eucount = 0;
    contearn = 0;
    contcount = 0;
    % realized earnings by type
    for s=1:size(ids,1) % loop over sessions
        for j=1:numsubjects(s) % loop over subjects in session
            identity = (session == s & subjectid == ids(s,j));

            % extract payoff
            spayoff = payoff(identity==1);
            searn = sum(spayoff)*0.004;
            if eupttype(s,j) >= 1000 && eupttype(s,j) < 2000 %herding
                herdearn = herdearn + searn;
                herdcount = herdcount + 1;
            end
            if eupttype(s,j) >=2000 %contrarian
                contearn = contearn + searn;
                contcount = contcount + 1;
            end
            if eupttype(s,j) <1000 % EU
                euearn = euearn + searn;
                eucount = eucount + 1;
            end
        end
    end
    disp('Average earnings for herding, contrarian, and EU:');
    [herdearn/herdcount contearn/contcount euearn/eucount]
   
    % plots of herding vs contrarianism
    h = subjherd(subjherd~=-1);
    c = subjcont(subjcont~=-1);
    t = eupttype(eupttype~=-1);
    figure;
    hold on;
    nact = lastround - firstround + 1;
    scatter(h(t>=1000&t<2000)/nact,c(t>=1000&t<2000)/nact,'^k','filled'); % herding
    scatter(h(t>=2000)/nact,c(t>=2000)/nact,'ok','filled'); % contrarians
    scatter(h(t<1000)/nact,c(t<1000)/nact,'xk'); % EU
    legend('Herding type','Contrarian type','Expected utility type');
    xlabel('Herding decisions');
    ylabel('Contrarian decisions');
    if i==1
        title('NO BELIEFS');
    else
        title('MAIN');
    end
    
    % report fraction of subjects within 0.1 of one axis or the other
    disp('Fraction of subjects with less than 10% of that violate their type:');
    sum((h/nact<=0.1)|(c/nact<=0.1))/numsubj(i)
    disp('Fraction of subjects with more than 10%:');
    sum((h/nact>0.1)&(c/nact>0.1))/numsubj(i)
    
    % correlation of herding and contrarian across subjects
    [kendallrho,kendallpval] = corr(h(t>=1000),c(t>=1000),'Type','Kendall')
    [spearmanrho,spearmanpval] = corr(h(t>=1000),c(t>=1000),'Type','Spearman')
    
end

% compare demographics
disp('Demographics t-tests');
disp('Age');
mean(agearr)
[h,p]=ttest2(agearr(:,1),agearr(:,2))
disp('Education');
mean(educarr)
[h,p]=ttest2(educarr(:,1),educarr(:,2))
disp('Gender')
mean(genderarr)
[h,p]=ttest2(genderarr(:,1),genderarr(:,2))


% find beliefs that best fit aggregate data by treatment
[publicb,bayesianb,actualbeliefs_nb] = findbeliefs(params,nbdata(:,7),nbdata(:,8),nbdata(:,14),pricethreshold);
save('actual_beliefs','actualbeliefs_nb');

% plot Bayesian and incorrect beliefs
figure;
hold on;
plot(publicb,bayesianb(:,1),'-b','Linewidth',1);
plot(publicb,actualbeliefs_nb(:,1),'-b','Linewidth',2);
plot(publicb,bayesianb(:,2),'--r','Linewidth',1);
plot(publicb,actualbeliefs_nb(:,2),'--r','Linewidth',2);
legend('Favorable Signal - Bayesian','Favorable Signal - Estimated','Unfavorable Signal - Bayesian','Unfavorable Signal - Estimated'); 
xlabel('Price');
ylabel('Belief');

%%%%%%%%%%%%%%%%%%%%%%%%%%
% write data to file
%%%%%%%%%%%%%%%%%%%%%%%%%%

header=['treatment,session,subjectid,round,currentrow,payoff,realizedtrade,action1,action0,education,age,gender,turn,assetval,price,contradiction,streak,opposite,lastassetval,assetval11,assetval00,assetval111,assetval000,herding,rational,contrarian,notrade,partialherding,partialcontrarian,irrational'];

outid = fopen('parseddata.csv', 'w+');

fprintf(outid, '%s', header);
fclose(outid);
% write treatment id and data to file
dlmwrite ('parseddata.csv',[zeros(size(nbdata,1),1) nbdata; ones(size(bdata,1),1) bdata],'roffset',1,'precision','%.4f','-append')
